'''
Created on 2011-11-21

@author: Administrator
'''
import urllib2
import datetime
class BBStock():
    def stocklist(self,reg):
        pass
    def getAstocklistfrombloomberg(self):
        def parseonestock(src):
            nlist=src.split('<td')
            ret=[]
            for n in nlist:
                
                p=n.find('</td')
                k=n.rfind('</span',0,p)
                if k>0:p=k
                s=n.rfind('>',0,p)
                v=n[s:p].strip('>\n\t ')
                if len(v)>0: ret.append(v)
            return ret
            pass
        def getonepage(page=1):
            url='http://www.bloomberg.com/markets/companies/country/china/'+str(page)+'/'
            content=urllib2.urlopen(url).read()
            s=content.find('<table class="ticker_data">')
            p=content.find('</table',s)
            nlist=content[s:p].split('<tr')
            stocklist=[]
            for n in nlist:
                ret=parseonestock(n)
                if len(ret)>0:stocklist.append(ret)
            return stocklist
        stocklist=[]
        tlist=[]
        page=1
        tlist=getonepage(page)
        for t in tlist:
            stocklist.append(t)   
        print "page:",page," tlist len:",len(tlist)
        while len(tlist)==200:
            page=page+1
            tlist=getonepage(page)
            print "page:",page," tlist len:",len(tlist)
            for t in tlist:
                stocklist.append(t)         
        return stocklist    
if __name__ == '__main__':
    starttime=datetime.datetime.now()
    bb=BBStock()    
    result=bb.getAstocklistfrombloomberg()
    print len(result)
    print starttime,(datetime.datetime.now()-starttime).seconds
    pass